#----------------------------------------------------------------------------------------------------------------------------------*
  
# Replication_DempseyIonescu_Summarystats.R contains codes for the summary tables in the paper shown in tables 1 and 2

#----------------------------------------------------------------------------------------------------------------------------------*

library(tidyverse)
library(haven)
library(Hmisc)
library(xlsx)

setwd("/data/***")

default_probs <- read_dta("average_apr_calculations_closed.dta")

##########################################################################################################################
# 
# Variable generation ----------------------------------------------------------------------------------------------------
#
##########################################################################################################################

#The following variables are created or defined in either Replication_DempseyIonescu_ProbitRegression.do or Replication_DempseyIonescu_OLSRegression.do:

# -uniqueid
# -mean_borrower_income
# -fico_mean_2018
# -apr_mean_2018
# -mean_spread
# -one_def, two_defs, three_defs
# -bank_fe_phat
# -all_fe_three_phat
# -mult_flag
# -revolver
# -mean_revolved_balance
# -revolver_count
# -new_account


# New variables used in this script are defined below:

# -q1_revolve, q2_revolve, q3_revolve, q4_revolve: A dummy indicating if a variable has revolved for at least N quarters out of the year sample. Calculated using revolver_count as follows:

default_probs <- default_probs %>% mutate(q1_revolve = ifelse(revolver_count >= 3, 1, 0),
                                          q2_revolve = ifelse(revovler_count >= 6, 1, 0),
                                          q3_revolve = ifelse(revovler_count >= 9, 1, 0),
                                          q4_revolve = ifelse(revovler_count >= 12, 1, 0))

# -mean_balance: Taken as the mean value over 2018 for the Y-14M variable cycleendingbalance, line item 15. Missing and negative values of cycleedningbalance are ignored.

default_probs <- default_probs %>% group_by(uniqueid) %>% mutate(mean_balance = mean(ifelse(cycleendingbalance >= 0, cycleendingbalance, NA), na.rm = T))
  
##########################################################################################################################
#
# Table generation ------------------------------------------------------------------------------------------------------
#
##########################################################################################################################

#Table 1 ----------------------------------------------------------------------------------------------------------------

#Custom functions are created for efficiency

mean_na <- function(x) {mean(x, na.rm = T)}
sd_na <- function(x) {sd(x, na.rm = T)}

#Mean and SD are taken of requested variables, stored in aggregate
#one_def is bankruptcy, two_defs is default rate, measured as bankruptcy flag and chargeoff as reason for account closure, and three_defs includes delinquency

aggregate <- default_probs %>%
  summarise(across(c(one_def, two_defs, three_defs, revolver, q1_revolve, q2_revolve, q3_revolve, q4_revolve), list(mean = mean_na, sd = sd_na
  )))

#For revolver count, value is conditional on being a revolver

aggregate_revolvers <- default_probs %>%
  filter(revolver == 1) %>%
  summarise(across(c(revolver_count), list(mean = mean_na, sd = sd_na
  )))

table_1 <- rbind(aggregate, aggregate_revolvers)

write_csv(table_1, "output/table_1.csv")

#Figures for Table 1 are read off table_1.csv

#Table 2 ----------------------------------------------------------------------------------------------------------------

#Custom functions are created for efficiency

p10_na <- function(x) {quantile(x, probs = 0.1, na.rm = T)}
p50_na <- function(x) {quantile(x, probs = 0.5, na.rm = T)}
p90_na <- function(x) {quantile(x, probs = 0.9, na.rm = T)}
mean_end <- function(x) {weighted.mean(x, w = default_probs$mean_balance, na.rm = T)}
sd_end <- function(x) {sqrt(wtd.var(x, w = default_probs$mean_balance, na.rm = T))}
p10_end <- function(x) {weighted.quantile(x, w = default_probs$mean_balance, prob = 0.1)}
p50_end <- function(x) {weighted.quantile(x, w = default_probs$mean_balance, prob = 0.5)}
p90_end <- function(x) {weighted.quantile(x, w = default_probs$mean_balance, prob = 0.9)}

#Mean, SD, and percentiles are taken of requested variables, stored in aggregate
#The "mean" term in variable names refers to the fact that these values reflect the mean across 2018

aggregate <- default_probs %>%
  mutate(account_age = account_age/90) %>%
  summarise(across(c(mean_balance, mean_borrower_income, fico_mean_2018, account_age),
                   list(mean = mean_na, sd = sd_na, p10 = p10_na, p50 = p50_na, p90 = p90_na
  )))

#For necessary variables, value is conditional on being a revolver

aggregate_revolvers <- default_probs %>%
  filter(revolver == 1) %>%
  mutate(account_age = account_age/90) %>%
  summarise(across(c(mean_revolved_balance, mean_borrower_income, fico_mean_2018, account_age), 
                   list(mean = mean_na, sd = sd_na, p10 = p10_na, p50 = p50_na, p90 = p90_na
  )))

#Other values also require weighting by cycle end balance

aggregate_end <- default_probs %>%
  summarise(across(c(apr_mean_2018, mean_spread), 
                   list(mean = mean_na, sd = sd_na, p10 = p10_end, p50 = p50_end, p90 = p90_end
                   )))

table_2 <- rbind(aggregate, aggregate_revolvers, aggregate_end)

write_csv(table_2, "output/table_2.csv")

#Figures for Table 2 are read off table_2.csv